Google 계정
박성준
tjdwnsqkr12@gmail.com
코드
아래에 코드 셀 삽입
Ctrl+M B
텍스트
텍스트 셀 추가
헤더 공개 상태 전환
노트북
코드 텍스트

Q1. 지역에 따라서 선호하는 게임 장르가 다를까 라는 질문에 대답

Q2. 연도별 게임의 트렌드가 있을까 라는 질문에 대답

Q3. 출고량이 높은 게임에 대한 분석 및 시각화 프로세스가 포함

=>다음 분기에 어떤 게임을 설계해야 할까

코드 텍스트

코드 텍스트

Name : 게임의 이름입니다.

Platform : 게임이 지원되는 플랫폼의 이름입니다.

Year : 게임이 출시된 연도입니다.

Genre : 게임의 장르입니다.

Publisher : 게임을 제작한 회사입니다.

NA_Sales : 북미지역에서의 출고량입니다.

EU_Sales : 유럽지역에서의 출고량입니다.

JP_Sales : 일본지역에서의 출고량입니다.

Other_Sales : 기타지역에서의 출고량입니다.

코드 텍스트

df = pd.read_csv('vgames.csv' , index_col=0#파일불러오고
df=df.drop(columns='Publisher'# 솔직히 만든회사는 필요없음
df.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 16598 entries, 1 to 16598
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Name         16598 non-null  object 
 1   Platform     16598 non-null  object 
 2   Year         16597 non-null  float64
 3   Genre        16597 non-null  object 
 4   NA_Sales     16598 non-null  float64
 5   EU_Sales     16598 non-null  float64
 6   JP_Sales     16598 non-null  float64
 7   Other_Sales  16598 non-null  float64
dtypes: float64(5), object(3)
memory usage: 1.1+ MB
코드 텍스트

df.isna().sum() #결측치개수확인
Name           0
Platform       0
Year           1
Genre          1
NA_Sales       0
EU_Sales       0
JP_Sales       0
Other_Sales    0
dtype: int64
코드 텍스트

df[(df['Year'].isnull()) | (df['Genre'].isnull())].head() #결측치만 뽑아서 볼게

#2개는 도무지 안나옴
코드 텍스트

df=df.dropna() #결측치 제거
코드 텍스트

df.Year=df['Year'].astype('int'# Year정수변환
/usr/local/lib/python3.7/dist-packages/pandas/core/generic.py:5516: SettingWithCopyWarning:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

코드 텍스트

df = df[df['Year'] > 1900].reset_index(drop=True#1900년보다 작은애들 제거 (이상치)
df[df.duplicated()].index #중복값 확인
df=df.drop(index=9180)
df=df.reset_index(drop=True)
코드 텍스트

df.duplicated().sum()#중복치볼래
0

df.isnull().sum() #결측치최종확인
Name           0
Platform       0
Year           0
Genre          0
NA_Sales       0
EU_Sales       0
JP_Sales       0
Other_Sales    0
dtype: int64
코드 텍스트

df.nunique() #칼럼별 고유값의 개수 =>장르개수 파악
Name           11489
Platform          31
Year              44
Genre             13
NA_Sales         409
EU_Sales         305
JP_Sales         244
Other_Sales      157
dtype: int64

df.NA_Sales=df['NA_Sales'].astype('float')
df.EU_Sales=df['EU_Sales'].astype('float')
df.JP_Sales=df['JP_Sales'].astype('float')
df.Other_Sales=df['Other_Sales'].astype('float')  #===> object에서 float으로 변경 나중에 합쳐야함
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16589 entries, 0 to 16588
Data columns (total 8 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Name         16589 non-null  object 
 1   Platform     16589 non-null  object 
 2   Year         16589 non-null  int64  
 3   Genre        16589 non-null  object 
 4   NA_Sales     16589 non-null  float64
 5   EU_Sales     16589 non-null  float64
 6   JP_Sales     16589 non-null  float64
 7   Other_Sales  16589 non-null  float64
dtypes: float64(4), int64(1), object(3)
memory usage: 1.0+ MB
코드 텍스트

Total=[]
for i in range(df.shape[0]):
         Total.append(df.iloc[i,4:].sum())
df['Total'] = Total                                         
#총

labels=df.Genre.value_counts().index
explode = [0.3,0.1,0,0,0,0,0,0,0,0,0,0,0]
sizes = df.Genre.value_counts().values
# visual
plt.figure(figsize = (7,7))
plt.pie(sizes, explode=explode, labels=labels, colors=sns.color_palette('Set2'), autopct='%1.1f%%')
plt.title('Games According to Genre',fontsize = 17,color = 'black')                                         #전체 데이터에서 장르의 비율

platform1 = df['Platform'].unique()
platform1
array(['DS', 'Wii', 'PSP', 'PS3', 'PC', 'PS', 'GBA', 'PS4', 'PS2', 'XB',
       'X360', 'GC', '3DS', '2600', 'SAT', 'GB', 'NES', 'DC', 'N64',
       'XOne', 'SNES', 'WiiU', 'PSV', 'GEN', 'SCD', 'WS', 'NG', 'TG16',
       '3DO', 'GG', 'PCFX'], dtype=object)

labels=df.Platform.value_counts().index
explode = [0.3,0.1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
sizes = df.Platform.value_counts().values
# visual
plt.figure(figsize = (12,15))                                                                                                    
plt.pie(sizes, explode=explode, labels=labels, colors=sns.color_palette('Set2'), autopct='%1.1f%%')
plt.title('Games According to Platform',fontsize = 17,color = 'black')                                                                  #전체 데이터에서 플렛폼의 비율

sns.countplot(df.Genre)
plt.xticks(rotation=90
plt.title("Genre",color="blue",fontsize=15)     #장르별 총 갯수
코드 텍스트

sns.countplot(df.Platform)
plt.xticks(rotation=90)
plt.title("Platforms",color="blue",fontsize=30)    #플렛폼 종류별 갯수

# game = df.loc[df['Name']!='Wii Sports',['Name','NA_Sales']]
# game = game.sort_values('NA_Sales', ascending=False)
# game = game.head()

# fig = px.pie(game, names='Name', values='NA_Sales', template='seaborn')
# fig.update_traces(rotation=90, pull=0.06, textinfo="percent+label")
# fig.show()


# game1 = df.loc[df['Name']!='Wii Sports',['Name','EU_Sales']]
# game1= game1.sort_values('EU_Sales', ascending=False)
# game1.head()
# fig = px.pie(game1, names='Name', values='EU_Sales', template='seaborn')
# fig.update_traces(rotation=90, pull=0.06, textinfo="percent+label")
# fig.show()


# game2 = df.loc[:,['Name','JP_Sales']]
# game2 = game2.sort_values('JP_Sales', ascending=False)
# game2.head()
# fig = px.pie(game2, names='Name', values='JP_Sales', template='seaborn')
# fig.update_traces(rotation=90, pull=0.06, textinfo="percent+label")                  
# fig.show()


# game3 = df.loc[:,['Name','Other_Sales']]
# game3= game3.sort_values('Other_Sales', ascending=False)
# game3.head()
# fig = px.pie(game3, names='Name', values='Other_Sales', template='seaborn')
# fig.update_traces(rotation=90, pull=0.06, textinfo="percent+label")
# fig.show()

#각 나라별 게임 판매량을 파이형식으로 나타내려 했는데 런타임에 계속 걸려서 주석처리.(미국,유럽의 경우 wii sports각 압도적이어서 제외하고 넣음)

시각화


genre = df['Genre'].unique() #장르 개수
genre_s = sorted(genre)
genre_s
['Action',
 'Adventure',
 'Fighting',
 'Misc',
 'Party',
 'Platform',
 'Puzzle',
 'Racing',
 'Role-Playing',
 'Shooter',
 'Simulation',
 'Sports',
 'Strategy']

na_sales=[]
eu_sales=[]
jp_sales=[]
other_sales=[]                                   #각 지역의 판매량을 장르 별로 총합해서 병합
for i in genre_s:             
    val=df[df.Genre==i]
    na_sales.append(val.NA_Sales.sum())
    eu_sales.append(val.EU_Sales.sum())
    jp_sales.append(val.JP_Sales.sum())
    other_sales.append(val.Other_Sales.sum())

fig = go.Figure()
fig.add_trace(go.Bar(x=na_sales,
                     y=genre_s,
                     name='North America Sales',
                     marker_color='skyblue',
                     orientation='h'))
fig.add_trace(go.Bar(x=eu_sales,
                     y=genre_s,
                     name='Europe Sales',
                     marker_color='cornsilk',
                     orientation='h'))
fig.add_trace(go.Bar(x=jp_sales,
                     y=genre_s,
                     name='Japan Sales',
                     marker_color='burlywood',
                     orientation='h'))
fig.add_trace(go.Bar(x=other_sales,
                     y=genre_s,
                     name='Other Region Sales',
                     marker_color='hotpink',
                     orientation='h'))
fig.update_layout(title_text='지역별 판매량이 높은 장르',xaxis_title="판매량",yaxis_title="장르",
                  barmode='stack')
fig.show()                                   #지역별 장르 판매량을 시각화

모든 지역 Action 장르가 1순위

2순위는 Sports


연도별 게임의 트렌드가 있을까


xaction=df[df.Genre=="Action"]
xsports=df[df.Genre=="Sports"]
xmisc=df[df.Genre=="Misc"]
xrole=df[df.Genre=="Role-Playing"]
xshooter=df[df.Genre=="Shooter"]
xadventure=df[df.Genre=="Adventure"]
xrace=df[df.Genre=="Racing"]
xplatform=df[df.Genre=="Platform"]
xsimulation=df[df.Genre=="Simulation"]
xfight=df[df.Genre=="Fighting"]
xstrategy=df[df.Genre=="Strategy"]
xpuzzle=df[df.Genre=="Puzzle"]                 #장르를 종류별로 다시 저장

trace1 = go.Bar(
    x=xaction.groupby("Platform")["Total"].sum().index,
    y=xaction.groupby("Platform")["Total"].sum().values,
    opacity=0.75,
    name = "Action",
    marker=dict(color="rgb(119,172,238)"))
trace2 = go.Bar(
    x=xsports.groupby("Platform")["Total"].sum().index,
    y=xsports.groupby("Platform")["Total"].sum().values,
    opacity=0.75,
    name = "Sports",
    marker=dict(color='rgb(21,90,174)'))
trace3 = go.Bar(
    x=xrace.groupby("Platform")["Total"].sum().index,
    y=xrace.groupby("Platform")["Total"].sum().values,
    opacity=0.75,
    name = "Racing",
    marker=dict(color="rgb(156,245,163)"))
trace4 = go.Bar(
    x=xshooter.groupby("Platform")["Total"].sum().index,
    y=xshooter.groupby("Platform")["Total"].sum().values,
    opacity=0.75,
    name = "Shooter",
    marker=dict(color="rgb(14,135,23)"))
trace5 = go.Bar(
    x=xmisc.groupby("Platform")["Total"].sum().index,
    y=xmisc.groupby("Platform")["Total"].sum().values,
    opacity=0.75,
    name = "Misc",
    marker=dict(color='rgb(252,118,103)'))
trace6 = go.Bar(
    x=xrole.groupby("Platform")["Total"].sum().index,
    y=xrole.groupby("Platform")["Total"].sum().values,
    opacity=0.75,
    name = "Role Playing",
    marker=dict(color="rgb(226,28,5)"))
trace7 = go.Bar(
    x=xfight.groupby("Platform")["Total"].sum().index,
    y=xfight.groupby("Platform")["Total"].sum().values,
    opacity=0.75,
    name = "Fighting",
    marker=dict(color="rgb(247,173,13)"))
trace8 = go.Bar(
    x=xplatform.groupby("Platform")["Total"].sum().index,
    y=xplatform.groupby("Platform")["Total"].sum().values,
    opacity=0.75,
    name = "Platform",
    marker=dict(color="rgb(242,122,13)"))
trace9 = go.Bar(
    x=xsimulation.groupby("Platform")["Total"].sum().index,
    y=xsimulation.groupby("Platform")["Total"].sum().values,
    opacity=0.75,
    name = "Simulation",
    marker=dict(color="rgb(188,145,202)"))
trace10 = go.Bar(
    x=xadventure.groupby("Platform")["Total"].sum().index,
    y=xadventure.groupby("Platform")["Total"].sum().values,
    opacity=0.75,
    name = "Adventure",
    marker=dict(color='rgb(104,57,119)'))
trace11 = go.Bar(
    x=xstrategy.groupby("Platform")["Total"].sum().index,
    y=xstrategy.groupby("Platform")["Total"].sum().values,
    opacity=0.75,
    name = "Strategy",
    marker=dict(color='rgb(245,253,104)'))
trace12 = go.Bar(
    x=xpuzzle.groupby("Platform")["Total"].sum().index,
    y=xpuzzle.groupby("Platform")["Total"].sum().values,
    opacity=0.75,
    name = "Puzzle",
    marker=dict(color='rgb(138,72,40)'))

data = [trace1, trace2,trace3,trace4,trace5,trace6,trace7,trace8,trace9,trace10,trace11,trace12]
layout = go.Layout(barmode='stack',
                   title='Total  According to Platform and Genre',
                   xaxis=dict(title='Platform'),
                   yaxis=dict( title='Total(In Millions)'),
                   paper_bgcolor='white',
                   plot_bgcolor='white'
)
fig = go.Figure(data=data, layout=layout)
fig.show()                                             #플렛폼별 장르에 대한 총 판매량
코드 텍스트

# Total = [sum for i in]

# x = df.groupby(['Year']).count()
# x = x['Total']
# y = x.index.astype(int)

# plt.figure(figsize=(20,14))
# colors = sns.color_palette("muted")
# ax = sns.barplot(y = y, x = x, orient='h', palette=colors)
# ax.set_xlabel(xlabel='Total_sales', fontsize=19)
# ax.set_ylabel(ylabel='Year', fontsize=22)
# ax.set_title(label='Total Sales per Year', fontsize=22)
# plt.show();                                                                                 #연도별 전체게임의 판매량

y = df.groupby(['Year']).sum()
y = y['Total']
x = y.index.astype(int)

plt.figure(figsize=(15,7))
zz = sns.barplot(y = y, x = x)
zz.set_xlabel(xlabel='Year', fontsize=17)
zz.set_xticklabels(labels = x, fontsize=12, rotation=45)
zz.set_ylabel(ylabel='Total_Sales', fontsize=17)
zz.set_title(label='Total Sales per Year', fontsize=22)
plt.show();                                                                                 #연도별 총 판매량.,

코드 텍스트

plt.figure(figsize=(2510))
a = sns.barplot(x='Year', y='Count', data=top_genre_count)
index = 0
for value in top_genre_count['Count'].values:
    a.text(index, value + 5str(gerne[index] + '----' +str(value)), color='#000', size=14, rotation= 90, ha="center")
    index += 1
plt.xticks(rotation=0)
plt.show()       #시각화

top_platform =  df[['Year''Platform']]
top_platform_df = top_platform.groupby(by=['Year''Platform']).size().reset_index(name='Count')
top_platform_idx =  top_platform_df.groupby(by=['Year'])['Count'].transform(max) == top_platform_df['Count']
top_platform_count = top_platform_df[top_platform_idx].reset_index(drop=True)
top_platform_count  = top_platform_count.drop_duplicates(subset=["Year""Count"], keep='last').reset_index(drop=True)

platform= top_platform_count['Platform']
코드 텍스트

plt.figure(figsize=(2510))
a = sns.barplot(x='Year', y='Count', data=top_platform_count)
index = 0
for value in top_platform_count['Count'].values:
    a.text(index, value + 5str(platform[index] + '----' +str(value)), color='#000', size=14, rotation= 90, ha="center")
    index += 1
plt.xticks(rotation=0)
plt.show()  

1990~2002 년까지는 Sports가 주류였으며 그 이후부턴 Action게임이 주류를 이룸

코드 텍스트

출고량이 높은 게임에 대한 분석 및 시각화 프로세스가 포함


코드 텍스트

zz=df[df['Name'].isin(['Grand Theft Auto V''Wii Sports','Super Mario Bros.','Tetris','Mario Kart Wii'])].sort_values(by=['Name'])
zz #상위 5개 게임에대한 세부내용 데이터 셋


ww=df[df['Platform'].isin(['DS'])].sort_values(by=['Year'])
y = ww.groupby(['Year']).sum()
y = y['Total']
x = y.index.astype(int)

plt.figure(figsize=(15,7))
rr = sns.barplot(y = y, x = x)
rr.set_xlabel(xlabel='Year', fontsize=17)
rr.set_xticklabels(labels = x, fontsize=12, rotation=45)
rr.set_ylabel(ylabel='Total_Sales', fontsize=17)
rr.set_title(label='Total Sales per Year(DS)', fontsize=22)
plt.show();

tt=df[df['Platform'].isin(['PS'])].sort_values(by=['Year'])
y = tt.groupby(['Year']).sum()
y = y['Total']
x = y.index.astype(int)

plt.figure(figsize=(15,7))
rr1 = sns.barplot(y = y, x = x)
rr1.set_xlabel(xlabel='Year', fontsize=17)
rr1.set_xticklabels(labels = x, fontsize=12, rotation=45)
rr1.set_ylabel(ylabel='Total_Sales', fontsize=17)
rr1.set_title(label='Total Sales per Year(PS)', fontsize=22)
plt.show();

tt=df[df['Platform'].isin(['PS2'])].sort_values(by=['Year'])
y = tt.groupby(['Year']).sum()
y = y['Total']
x = y.index.astype(int)

plt.figure(figsize=(15,7))
rr1 = sns.barplot(y = y, x = x)
rr1.set_xlabel(xlabel='Year', fontsize=17)
rr1.set_xticklabels(labels = x, fontsize=12, rotation=45)
rr1.set_ylabel(ylabel='Total_Sales', fontsize=17)
rr1.set_title(label='Total Sales per Year(PS2)', fontsize=22)
plt.show();

tt=df[df['Platform'].isin(['PS3'])].sort_values(by=['Year'])
y = tt.groupby(['Year']).sum()
y = y['Total']
x = y.index.astype(int)

plt.figure(figsize=(15,7))
rr1 = sns.barplot(y = y, x = x)
rr1.set_xlabel(xlabel='Year', fontsize=17)
rr1.set_xticklabels(labels = x, fontsize=12, rotation=45)
rr1.set_ylabel(ylabel='Total_Sales', fontsize=17)
rr1.set_title(label='Total Sales per Year(PS3)', fontsize=22)
plt.show();

tt=df[df['Platform'].isin(['3DS'])].sort_values(by=['Year'])
y = tt.groupby(['Year']).sum()
y = y['Total']
x = y.index.astype(int)

plt.figure(figsize=(15,7))
rr1 = sns.barplot(y = y, x = x)
rr1.set_xlabel(xlabel='Year', fontsize=17)
rr1.set_xticklabels(labels = x, fontsize=12, rotation=45)
rr1.set_ylabel(ylabel='Total_Sales', fontsize=17)
rr1.set_title(label='Total Sales per Year(3DS)', fontsize=22)
plt.show();

tt=df[df['Platform'].isin(['PS4'])].sort_values(by=['Year'])
y = tt.groupby(['Year']).sum()
y = y['Total']
x = y.index.astype(int)

plt.figure(figsize=(15,7))
rr1 = sns.barplot(y = y, x = x)
rr1.set_xlabel(xlabel='Year', fontsize=17)
rr1.set_xticklabels(labels = x, fontsize=12, rotation=45)
rr1.set_ylabel(ylabel='Total_Sales', fontsize=17)
rr1.set_title(label='Total Sales per Year(PS4)', fontsize=22)
plt.show();